Distinct echo substrings [KMP, Sliding Window, Rabin-Karp]¶
Time: O(N^2+D); Space: O(R); hard
Return the number of distinct non-empty substrings of text that can be written as the concatenation of some string with itself (i.e. it can be written as a + a where a is some string).
Example 1:
Input: text = “abcabcabc”
Output: 3
Explanation:
The 3 substrings are “abcabc”
Example 2:
Input: text = “leetcodeleetcode”
Output: 2
Explanation:
The 2 substrings are “ee”
Constraints:
1 <= len(text) <= 2000
text has only lowercase English letters.
Hints:
Given a substring of the text, how to check if it can be written as the concatenation of a string with itself ?
We can do that in linear time, a faster way is to use hashing.
Try all substrings and use hashing to check them.
[1]:
class Solution1(object):
"""
KMP
Time: O(n^2 + d), d is the duplicated of result substrings size
Space: O(r), r is the size of result substrings set
"""
def distinctEchoSubstrings(self, text):
"""
:type text: str
:rtype: int
"""
def KMP(text, l, result):
prefix = [-1]*(len(text)-l)
j = -1
for i in range(1, len(prefix)):
while j > -1 and text[l+j+1] != text[l+i]:
j = prefix[j]
if text[l+j+1] == text[l+i]:
j += 1
prefix[i] = j
if (j+1) and (i+1) % ((i+1) - (j+1)) == 0 and \
(i+1) // ((i+1) - (j+1)) % 2 == 0:
result.add(text[l:l+i+1])
return len(prefix)-(prefix[-1]+1) \
if prefix[-1]+1 and len(prefix) % (len(prefix)-(prefix[-1]+1)) == 0 \
else float("inf")
result = set()
i, l = 0, len(text)-1
while i < l: # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcdefabcdefabcdef
l = min(l, i + KMP(text, i, result));
i += 1
return len(result)
[2]:
s = Solution1()
text = "abcabcabc"
assert s.distinctEchoSubstrings(text) == 3
text = "leetcodeleetcode"
assert s.distinctEchoSubstrings(text) == 2
[3]:
class Solution2(object):
"""
Time: O(n^2 + d), d is the duplicated of result substrings size
Space: O(r), r is the size of result substrings set
"""
def distinctEchoSubstrings(self, text):
"""
:type text: str
:rtype: int
"""
result = set()
for l in range(1, len(text)//2 + 1):
count = sum(text[i] == text[i+l] for i in range(l))
for i in range(len(text) - 2*l):
if count == l:
result.add(text[i:i+l])
count += (text[i+l] == text[i+l+l]) - (text[i] == text[i+l])
if count == l:
result.add(text[len(text)-2*l:len(text)-2*l+l])
return len(result)
[4]:
s = Solution2()
text = "abcabcabc"
assert s.distinctEchoSubstrings(text) == 3
text = "leetcodeleetcode"
assert s.distinctEchoSubstrings(text) == 2
[5]:
class Solution3(object):
"""
Time: O(n^2 + d), d is the duplicated of result substrings size
Space: O(r), r is the size of result substrings set
"""
def distinctEchoSubstrings(self, text):
"""
:type text: str
:rtype: int
"""
MOD = 10**9+7
D = 27 # a-z and ''
result = set()
for i in range(len(text)-1):
left, right, pow_D = 0, 0, 1
for l in range(1, min(i+2, len(text)-i)):
left = (D*left + (ord(text[i-l+1]) - ord('a') + 1)) % MOD
right = (pow_D*(ord(text[i+l]) - ord('a') + 1) + right) % MOD
if left == right: # assumed no collision
result.add(left)
pow_D = (pow_D*D) % MOD
return len(result)
[6]:
s = Solution3()
text = "abcabcabc"
assert s.distinctEchoSubstrings(text) == 3
text = "leetcodeleetcode"
assert s.distinctEchoSubstrings(text) == 2
[7]:
class Solution_TLE(object):
"""
Time: O(n^3 + d), d is the duplicated of result substrings size
Space: O(r), r is the size of result substrings set
"""
def distinctEchoSubstrings(self, text):
"""
:type text: str
:rtype: int
"""
def compare(text, l, s1, s2):
for i in range(l):
if text[s1+i] != text[s2+i]:
return False
return True
MOD = 10**9+7
D = 27 # a-z and ''
result = set()
for i in range(len(text)):
left, right, pow_D = 0, 0, 1
for l in range(1, min(i+2, len(text)-i)):
left = (D*left + (ord(text[i-l+1])-ord('a')+1)) % MOD
right = (pow_D*(ord(text[i+l])-ord('a')+1) + right) % MOD
if left == right and compare(text, l, i-l+1, i+1):
result.add(text[i+1:i+1+l])
pow_D = (pow_D*D) % MOD
return len(result)
[8]:
s = Solution_TLE()
text = "abcabcabc"
assert s.distinctEchoSubstrings(text) == 3
text = "leetcodeleetcode"
assert s.distinctEchoSubstrings(text) == 2